{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 用户和活动关联关系处理\n",
    "\n",
    "\n",
    "整个数据集中活动数目（events.csv）太多，所以下面的处理我们找出只在训练集和测试集中出现的活动和用户集合，并对他们重新编制索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "from sklearn.cluster import MiniBatchKMeans\n",
    "from sklearn import metrics\n",
    "\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>start_time</th>\n",
       "      <th>city</th>\n",
       "      <th>state</th>\n",
       "      <th>zip</th>\n",
       "      <th>country</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "      <th>c_1</th>\n",
       "      <th>...</th>\n",
       "      <th>c_92</th>\n",
       "      <th>c_93</th>\n",
       "      <th>c_94</th>\n",
       "      <th>c_95</th>\n",
       "      <th>c_96</th>\n",
       "      <th>c_97</th>\n",
       "      <th>c_98</th>\n",
       "      <th>c_99</th>\n",
       "      <th>c_100</th>\n",
       "      <th>c_other</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>684921758</td>\n",
       "      <td>3647864012</td>\n",
       "      <td>2012-10-31T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>244999119</td>\n",
       "      <td>3476440521</td>\n",
       "      <td>2012-11-03T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3928440935</td>\n",
       "      <td>517514445</td>\n",
       "      <td>2012-11-05T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2582345152</td>\n",
       "      <td>781585781</td>\n",
       "      <td>2012-10-30T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1051165850</td>\n",
       "      <td>1016098580</td>\n",
       "      <td>2012-09-27T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 110 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     event_id     user_id                start_time city state  zip country  \\\n",
       "0   684921758  3647864012  2012-10-31T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "1   244999119  3476440521  2012-11-03T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "2  3928440935   517514445  2012-11-05T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "3  2582345152   781585781  2012-10-30T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "4  1051165850  1016098580  2012-09-27T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "\n",
       "   lat  lng  c_1   ...     c_92  c_93  c_94  c_95  c_96  c_97  c_98  c_99  \\\n",
       "0  NaN  NaN    2   ...        0     1     0     0     0     0     0     0   \n",
       "1  NaN  NaN    2   ...        0     0     0     0     0     0     0     0   \n",
       "2  NaN  NaN    0   ...        0     0     0     0     0     0     0     0   \n",
       "3  NaN  NaN    1   ...        0     0     0     0     0     0     0     0   \n",
       "4  NaN  NaN    1   ...        0     0     0     0     0     0     0     0   \n",
       "\n",
       "   c_100  c_other  \n",
       "0      0        9  \n",
       "1      0        7  \n",
       "2      0       12  \n",
       "3      0        8  \n",
       "4      0        9  \n",
       "\n",
       "[5 rows x 110 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    " \"\"\"\n",
    "活动描述信息在events.csv文件：共110维特征\n",
    "前9列：event_id, user_id, start_time, city, state, zip, country, lat, and lng.\n",
    "event_id：id of the event, \n",
    "user_id：id of the user who created the event.  \n",
    "city, state, zip, and country： more details about the location of the venue (if known).\n",
    "lat and lng： floats（latitude and longitude coordinates of the venue）\n",
    "start_time： 字符串，ISO-8601 UTC time，表示活动开始时间\n",
    "\n",
    "后101列为词频：count_1, count_2, ..., count_100，count_other\n",
    "count_N：活动描述出现第N个词的次数\n",
    "count_other：除了最常用的100个词之外的其余词出现的次数\n",
    " \"\"\"\n",
    "\n",
    "#读取数据\n",
    "events = pd.read_csv(\"events_select.csv\")\n",
    "events.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# 获取聚类特征\n",
    "# events_fature = events.iloc[:,9:]\n",
    "events_fature = events.drop(['event_id', 'user_id', 'lat', 'lng', 'start_time', 'city', 'state', 'zip', 'country'], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>c_1</th>\n",
       "      <th>c_2</th>\n",
       "      <th>c_3</th>\n",
       "      <th>c_4</th>\n",
       "      <th>c_5</th>\n",
       "      <th>c_6</th>\n",
       "      <th>c_7</th>\n",
       "      <th>c_8</th>\n",
       "      <th>c_9</th>\n",
       "      <th>c_10</th>\n",
       "      <th>...</th>\n",
       "      <th>c_92</th>\n",
       "      <th>c_93</th>\n",
       "      <th>c_94</th>\n",
       "      <th>c_95</th>\n",
       "      <th>c_96</th>\n",
       "      <th>c_97</th>\n",
       "      <th>c_98</th>\n",
       "      <th>c_99</th>\n",
       "      <th>c_100</th>\n",
       "      <th>c_other</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 101 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   c_1  c_2  c_3  c_4  c_5  c_6  c_7  c_8  c_9  c_10   ...     c_92  c_93  \\\n",
       "0    2    0    2    0    0    0    0    0    0     0   ...        0     1   \n",
       "1    2    0    2    0    0    0    0    0    0     0   ...        0     0   \n",
       "2    0    0    0    0    0    0    0    0    0     0   ...        0     0   \n",
       "3    1    0    2    1    0    0    0    0    0     0   ...        0     0   \n",
       "4    1    1    0    0    0    0    0    2    0     0   ...        0     0   \n",
       "\n",
       "   c_94  c_95  c_96  c_97  c_98  c_99  c_100  c_other  \n",
       "0     0     0     0     0     0     0      0        9  \n",
       "1     0     0     0     0     0     0      0        7  \n",
       "2     0     0     0     0     0     0      0       12  \n",
       "3     0     0     0     0     0     0      0        8  \n",
       "4     0     0     0     0     0     0      0        9  \n",
       "\n",
       "[5 rows x 101 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "events_fature.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(13418, 101)\n"
     ]
    }
   ],
   "source": [
    "print(events_fature.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def K_cluster_analysis(K, X_train):\n",
    "    start = time.time()\n",
    "    \n",
    "    print(\"K-means begin with clusters: {}\".format(K));\n",
    "    \n",
    "    #K-means,在训练集上训练\n",
    "    mb_kmeans = MiniBatchKMeans(n_clusters = K)\n",
    "    mb_kmeans.fit(X_train)\n",
    "    \n",
    "    #以前两维特征打印训练数据的分类结果\n",
    "    #plt.scatter(X_train[:, 0], X_train[:, 1], c=y_pred)\n",
    "    #plt.show()\n",
    "\n",
    "    # K值的评估标准\n",
    "    #常见的方法有轮廓系数Silhouette Coefficient和Calinski-Harabasz Index\n",
    "    #这两个分数值越大则聚类效果越好\n",
    "    CH_score = metrics.calinski_harabaz_score(X_train,mb_kmeans.predict(X_train))\n",
    "#     CH_score = metrics.silhouette_score(X_train,mb_kmeans.predict(X_train))\n",
    "    \n",
    "    end = time.time()\n",
    "    print(\"CH_score: {}, time elaps:{}\".format(CH_score, int(end-start)))\n",
    "    \n",
    "    return CH_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K-means begin with clusters: 10\n",
      "CH_score: 1140.9063780073727, time elaps:0\n",
      "K-means begin with clusters: 20\n",
      "CH_score: 590.7056344385427, time elaps:0\n",
      "K-means begin with clusters: 30\n",
      "CH_score: 353.07425020259404, time elaps:0\n",
      "K-means begin with clusters: 40\n",
      "CH_score: 312.9594946757888, time elaps:0\n",
      "K-means begin with clusters: 50\n",
      "CH_score: 208.7924989744064, time elaps:0\n",
      "K-means begin with clusters: 60\n",
      "CH_score: 385.23768271194535, time elaps:0\n",
      "K-means begin with clusters: 70\n",
      "CH_score: 484.73763001585627, time elaps:0\n",
      "K-means begin with clusters: 80\n",
      "CH_score: 160.69235505394192, time elaps:0\n",
      "K-means begin with clusters: 90\n",
      "CH_score: 134.38327116328236, time elaps:0\n",
      "K-means begin with clusters: 100\n",
      "CH_score: 176.86254029904805, time elaps:0\n"
     ]
    }
   ],
   "source": [
    "# 设置超参数（聚类数目K）搜索范围\n",
    "Ks = np.arange(10, 101, 10)\n",
    "CH_scores = []\n",
    "for K in Ks:\n",
    "    ch = K_cluster_analysis(K, events_fature)\n",
    "    CH_scores.append(ch)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3XmUFOW5x/HvwyogsoM4oICgIEbE\njApqcAFR3GhuolGTyOW6Jq7BxKAx4prEqHFNVCJGTLxioiKIWwQVBAPXARQkiCAqm8Igi7ts7/3j\nqTkzwMDAdE9Xd9fvc86cma6u6X5oavrX9da7WAgBERFJnlpxFyAiIvFQAIiIJJQCQEQkoRQAIiIJ\npQAQEUkoBYCISEIpAEREEkoBICKSUAoAEZGEqhN3ATvSsmXL0KFDh7jLEBHJKzNmzFgVQmhV1X45\nHQAdOnSgpKQk7jJERPKKmX20M/upCUhEJKEUACIiCaUAEBFJKAWAiEhCKQBERBJKASAiklAKABGR\nhCrIAFi8GIYNg+XL465ERCR3FWQAfPEF3HorjBsXdyUiIrmrIAOgWzfo0gWeeSbuSkREcldBBoAZ\npFLwyiuwbl3c1YiI5KaCDADwANiwAZ5/Pu5KRERyU8EGwOGHQ5s2agYSEdmegg2A2rXhtNP8DODb\nb+OuRkQk9xRsAIA3A33xhV8LEBGRLRV0ABx3HOy+u5qBREQqU9ABsNtuMGAAjB0LmzfHXY2ISG4p\n6AAAGDQIVqyA6dPjrkREJLcUfACcdBLUrQtjxsRdiYhIbin4AGjSBI491gMghLirERHJHQUfAOC9\ngRYuhHnz4q5ERCR3JCIATjvNv6s3kIhIuUQEQFERHHaYAkBEpKJEBAB4M9Cbb8LSpXFXIiKSGxIV\nAKA1AkREyiQmALp1g/33VzOQiEiZxAQA+FnAq6/C2rVxVyIiEr/EBcDGjfDcc3FXIiISv0QFwGGH\nwZ57qhlIRAQSFgC1asHAgfDCC/DNN3FXIyISr0QFAHgz0JdfwsSJcVciIhKvxAXAscdC48ZqBhIR\nqTIAzOxhM1tpZu9U2NbczF42swXR92bRdjOze8xsoZnNNrNDKvzO4Gj/BWY2uGb+OVWrX99nCB03\nDjZtiqsKEZH47cwZwCPAiVttGwZMDCF0ASZGtwEGAF2irwuA+8EDAxgOHA4cBgwvC404DBoEK1fC\ntGlxVSAiEr8qAyCEMBlYvdXmgcCo6OdRQKrC9keDmwY0NbO2wAnAyyGE1SGENcDLbBsqWTNggK8R\noGYgEUmy6l4DaBNC+Bgg+t462l4ELKmw39Jo2/a2x2KPPaBvX60RICLJlumLwFbJtrCD7ds+gNkF\nZlZiZiWlpaUZLa6iVArefx/mzq2xpxARyWnVDYAVUdMO0feV0falQPsK+7UDlu9g+zZCCCNCCMUh\nhOJWrVpVs7yqaY0AEUm66gbAOKCsJ89gYGyF7edEvYF6AeuiJqKXgP5m1iy6+Ns/2habtm2hVy8F\ngIgk1850A30c+Dewv5ktNbNzgd8Dx5vZAuD46DbA88AiYCHwF+BnACGE1cBNwJvR143RtlilUjBj\nBixZUvW+IiKFxkIOXwUtLi4OJSUlNfb48+dD165w771wySU19jQiIlllZjNCCMVV7Ze4kcAV7b+/\nrxOgZiARSaJEBwB4M9Brr8GaNXFXIiKSXQqAlE8JoTUCRCRpEh8AxcWw114+KExEJEkSHwBlawS8\n+CJ8/XXc1YiIZE/iAwC8Geirr2DChLgrERHJHgUAcMwxPj+QegOJSJIoAIB69eDkk7VGgIgkiwIg\nkkrBqlXwxhtxVyIikh0KgMiAAX4moGYgEUkKBUCkcWPo188DIIdnxxARyRgFQAWpFCxaBHPmxF2J\niEjNUwBUcOqpYKZmIBFJBgVABXvuCb17KwBEJBkUAFtJpWDWLPjoo7grERGpWQqAraRS/n3s2B3v\nJyKS7xQAW+nSBQ44QM1AIlL4FACVGDQIJk+GTz+NuxIRkZqjAKiE1ggQkSRQAFTiu9+FoiI1A4lI\nYVMAVMLMzwJefNGniRYRKUQKgO1IpXyBmJdfjrsSEZGaoQDYjqOPhiZN1AwkIoVLAbAddevCKafA\ns8/Cxo1xVyMiknkKgB1Ipbwr6NSpcVciIpJ5CoAdOPFEqF9fzUAiUpgUADuw++5w/PFaI0BECpMC\noAqpFHz4IcyeHXclIiKZpQCoQtkaAWPGxF2JiEhmKQCq0Lo1HHmkrgOISOFRAOyEVArefhs++CDu\nSkREMietADCzn5vZXDN7x8weN7PdzKyjmU03swVm9oSZ1Yv2rR/dXhjd3yET/4BsGDjQv2uNABEp\nJNUOADMrAi4DikMIBwK1gTOBW4E7QwhdgDXAudGvnAusCSF0Bu6M9ssLnTvDgQeqGUhECku6TUB1\ngAZmVgdoCHwMHAc8Gd0/CojW2GJgdJvo/r5mZmk+f9akUvD667BqVdyViIhkRrUDIISwDLgdWIy/\n8a8DZgBrQwhlkycsBYqin4uAJdHvboz2b7H145rZBWZWYmYlpaWl1S0v4wYNgs2bYfz4uCsREcmM\ndJqAmuGf6jsCewGNgAGV7Fo2hKqyT/vbDK8KIYwIIRSHEIpbtWpV3fIyrmdPaN9ezUAiUjjSaQLq\nB3wQQigNIWwAngaOAJpGTUIA7YDl0c9LgfYA0f1NgNVpPH9Wla0R8K9/aY0AESkM6QTAYqCXmTWM\n2vL7Av8BXgV+EO0zGCjrOzMuuk10/ysh5NcEC2VrBLz0UtyViIikL51rANPxi7kzgTnRY40AfgUM\nNbOFeBv/yOhXRgItou1DgWFp1B2L730PmjVTM5CIFIY6Ve+yfSGE4cDwrTYvAg6rZN9vgNPTeb64\nbb1GQJ20Xj0RkXhpJPAuSqVgzRrvEioiks8UALvohBNgt93UDCQi+U8BsIsaNYL+/bVGgIjkPwVA\nNaRSsHgxvPVW3JWIiFSfAqAaTjkFatVSM5CI5DcFQDW0agVHHaVFYkQkvykAqimVgjlz4P33465E\nRKR6FADVpDUCRCTfKQCqqVMnOOggXQcQkfylAEhDKgVTp8LKlXFXIiKy6xQAadAaASKSzxQAaejR\nA/bZR81AIpKfFABpqLhGwBdfxF2NiMiuUQCkKZWCb7/1EBARyScKgDQddRQ0b65BYSKSfxQAaapT\nB0491S8Eb9gQdzUiIjtPAZABqRSsXQuTJ8ddiYjIzlMAZED//tCggXoDiUh+UQBkQMOGvlCM1ggQ\nkXyiAMiQVAqWLoWZM+OuRERk5ygAMkRrBIhIvlEAZEiLFtCnjwJARPKHAiCDUil45x1YuDDuSkRE\nqqYAyKCyNQJ0FiAi+UABkEEdOsDBBysARCQ/KAAyLJWCN96AFSvirkREZMcUABmWSvlYgGefjbsS\nEZEdUwBk2EEHQceOagYSkdynAMiwsjUCJkyAzz+PuxoRke1TANSAsjUCXnop7kpERLYvrQAws6Zm\n9qSZvWtm88yst5k1N7OXzWxB9L1ZtK+Z2T1mttDMZpvZIZn5J+SeI46Ali3VDCQiuS3dM4C7gRdD\nCF2BHsA8YBgwMYTQBZgY3QYYAHSJvi4A7k/zuXNWxTUC1q+PuxoRkcpVOwDMbA+gDzASIISwPoSw\nFhgIjIp2GwWkop8HAo8GNw1oamZtq115jkulYN06mDQp7kpERCqXzhlAJ6AU+KuZzTKzh8ysEdAm\nhPAxQPS9dbR/EbCkwu8vjbZtwcwuMLMSMyspLS1No7x4HX+8TxOtZiARyVXpBEAd4BDg/hBCT+BL\nypt7KmOVbNtm9vwQwogQQnEIobhVq1ZplBevBg3K1wjYvDnuakREtpVOACwFloYQpke3n8QDYUVZ\n0070fWWF/dtX+P12wPI0nj/nnXkmLF8O990XdyUiItuqdgCEED4BlpjZ/tGmvsB/gHHA4GjbYGBs\n9PM44JyoN1AvYF1ZU1GhOv10vxj8y1/CW2/FXY2IyJbqpPn7lwKPmVk9YBEwBA+Vf5jZucBi4PRo\n3+eBk4CFwFfRvgXNDB5+GHr0gLPOgpISaNQo7qpERFxaARBCeAsoruSuvpXsG4CL03m+fNSyJfzt\nb9CvH1xxBfzlL3FXJCLiNBI4C447DoYNg4cegn/+M+5qREScAiBLbrgBDj8czj8fPvoo7mpERBQA\nWVO3Lvzv/3qX0LPPho0b465IRJJOAZBFnTrBgw/6gjE33RR3NSKSdAqALDvrLBg8GG6+GSZPjrsa\nEUkyBUAM7r0X9t0XfvQjWL067mpEJKkUADFo3Bgef9zXDT7vPF9CUkQk2xQAMfnud+F3v4MxY2DE\niLirEZEkUgDE6Oc/h/79fYDY3LlxVyMiSaMAiFGtWjBqFOyxh18c/vrruCsSkSRRAMRszz09BObM\n8UnjRESyRQGQA048EYYOhT/9CcaNi7saEUkKBUCO+O1voWdPGDIEli2LuxoRSQIFQI6oXx9Gj4Zv\nv4Wf/AQ2bYq7IhEpdAqAHLLffj5I7NVX4dZb465GRAqdAiDH/Pd/+1KS110H06bFXY2IFDIFQI4x\ngwcegPbtvWvounVxVySFbPp0uOoq+PzzuCuROCgAclCTJj519JIlcNFFmipCMm/5cp+UsFcvuO02\nuP32uCuSOCgAclTv3r6IzOjRPk5AJBO++cZ7nO23nx9bw4ZBKgV33OFzU0myKABy2LBhcMwxcMkl\n8N57cVcj+SwEePpp6NYNfv1rn4Jk3jyfj+rWWz0Ybrkl7iol2xQAOax2bfj7372L6FlneRdRkV01\nezb07Qvf/z7svjtMmOBh0KmT37/ffnDuuX7t6YMP4q1VsksBkOOKiuDhh2HmTP/kJrKzVq2Cn/3M\nBxi+/baPNJ81y8Nga9dd5x84hg/Pfp0SHwVAHhg40P+Q77gDXnwx7mok123YAPfcA126+FTjF18M\nCxb4MVSnTuW/U1QEl13mZ5xz5mS3XomPAiBP3H47HHig99zQxTrZnpdegh494PLL4dBD/ZP/PfdA\n8+ZV/+6vfuU90HSmmRwKgDzRoIH32vjsMw+BzZvjrkhyyYIFcOqpPrHg+vUwdqyHQffuO/8YzZt7\nCDz7LEydWnO1Su5QAOSR7t3hzjv9D/uuu+KuRnLBunU+jXj37jBpEvzhD7640Gmn+aDCXXXZZdC2\nrfdA0/iTwqcAyDMXXuj9tocNgxkz4q5G4rJpE4wc6T147rjDJxB87z0Pg/r1q/+4DRv6BeEpU+CF\nFzJXr+QmCzkc88XFxaGkpCTuMnLO6tXeztuggfcO2n33uCuSbJoyxdv4Z86EI46Au++G4uLMPf6G\nDT5eoFEj7zVUSx8T846ZzQghVHlU6L82DzVv7r01Fi6ESy+NuxrJlsWLfaLA730PVq706UKmTMns\nmz9A3bpw880+fmD06Mw+tuQWBUCeOvpouPZaeOQRePzxuKuRmvTVV3D99dC1q1/cve46ePddHxxY\nnXb+nXHGGXDwwfCb3/hFZSlMaQeAmdU2s1lmNj663dHMppvZAjN7wszqRdvrR7cXRvd3SPe5k+66\n67wJ4KKLNIKzEIXgn8C7dvV5oU491d/4b7jBm2dqUq1aPk3EokXw0EM1+1wSn0ycAVwOzKtw+1bg\nzhBCF2ANcG60/VxgTQihM3BntJ+koU4deOwx/xR49tnediuFYcYMb+o56yxo0cJ7+DzxBOyzT/Zq\nOOEEP9O88Ub48svsPa9kT1oBYGbtgJOBh6LbBhwHPBntMgpIRT8PjG4T3d832l/S0KGDj/acNs2b\nCSS/rVgB553ng7jee8//b0tKoE+f7Ndi5mcBK1b4hWYpPOmeAdwFXAWUDUtqAawNIWyMbi8FiqKf\ni4AlANH966L9JU1nnOGTef3ud76cpOSf9et9tHeXLj7999ChPrjr/PN9jp649O7tU5Hceit8+ml8\ndUjNqHYAmNkpwMoQQsXe6JV9og87cV/Fx73AzErMrKS0tLS65SXO3Xd7n/Af/9gnAZP8EAKMH+/T\nfPzyl/5J/513PAyaNIm7OnfLLb5imNapLjzpnAEcCZxmZh8Co/Gmn7uApmZWNuVUO2B59PNSoD1A\ndH8TYPXWDxpCGBFCKA4hFLdq1SqN8pKlUSPvDbRqlZ8N5PDwDonMmwcDBvjF3Vq14PnnPQz23z/u\nyrbUvTuccw7cey8sXRp3NZJJ1Q6AEMLVIYR2IYQOwJnAKyGEHwGvAj+IdhsMjI1+HhfdJrr/lZDL\no9DyUM+e/ilt3Dj485/jrkZ2ZPx4H8w3bZpP7zFnjodBrrr+ep9/6sYb465EMqkmxgH8ChhqZgvx\nNv6R0faRQIto+1BgWA08d+JdfjmcdBJceaWm9c1VL73ki7P06OEXeq+4wgdf5bIOHeCnP/W1KebP\nj7sayRRNBVGAVq6Egw7y7oNvvunzu0humDgRTjnF+/ZPnLhz0zTnipUrYd99fcbRf/4z7mpkRzQV\nRIK1bg1/+xv85z9+JiC5YdIkb+/v3Blefjm/3vzBj6srr4Qnn/QPFpL/FAAF6vjjvVfJAw/4+q8S\nrzfegJNP9oFcEydCy5ZxV1Q9Q4d67ddcE3clkgkKgAJ2880+Udh558GSJXFXk1z/939+gXevveCV\nV/yTdL7aYw9fMWzCBP+S/KYAKGD16nnX0A0boH9/n0NmwgT44ou4K0uOmTN9SoWWLf3Nv23buCtK\n30UXwd57w9VXq7txvlMAFLjOnT0E6tXzADj+eGja1M8MLr/cL+YtX17148iumz3bX+899vA3/3bt\n4q4oM3bbzY+lkhI1L+Y79QJKkHXrvN/5lCn+NX06fP2139exIxx1FBx5pH/v1k0LgaRj7lw45hhf\nnWvyZOjUKe6KMmvTJu9ptmmTj1yuU6fq35Hs2dleQAqABNuwwVd8mjLFFwGfMsW7+gE0a+ZTTZeF\nwqGH+ic/qdr8+T6LZq1a3vOnS5e4K6oZzzwDgwb5dNHnnlv1/pI9CgDZZSHA+++XnyFMnerzz4M3\nIRUXl58hHHFE/vZkqUkLF/qb/8aN8NprfiZVqELwyeKWLfMBbQ0axF2RlFEASEaUlnoXxrIzhJKS\n8nUHunb1MCg7S9h335pboSoffPihT+b21Vc+K+t3vhN3RTXvtdfg2GN98jqNOckdCgCpEV9/7SFQ\ndpbwxhuwdq3f16ZN+RnCkUf63ES5PsVBpixZ4m/+69b5Bd+DD467ouw58UQfGLZoUe7MYJp0CgDJ\nis2bfcRxxesIH37o9zVsCIcfXh4KvXt7j5hCs2yZN/uUlvogr0wv0p7rZs2CQw7xNapvuinuagQU\nABKjZcvKw2DqVHjrLQ+KWrV8PMJdd+XelMfV9ckn3ttn2TKf3qFXr7grisdZZ/kstIsW+ZmgxEtz\nAUlsiop8lbJ77vG1bdeuhX/9C4YNg3//29vGr7nG28rzWWkp9O3rzT8vvJDcN3/wT/7r1/voc8kf\nCgCpcY0b+4CoW27xLpJnnunLV3br5l0Jc/gkdLs+/RT69fNPvOPHexNXknXu7FOOPPigvyaSHxQA\nklVt2sCjj3r/+MaNvR/5Kafk15vG2rXelDV/Powd671gBH7zGx8QNnx43JXIzlIASCz69PGLh7ff\n7iNlDzjApxf45pu4K9uxzz7zuX3mzPFpEPr3j7ui3LHXXj69yGOP+TQYkvsUABKbunW97/i778LA\ngb7sYPfuvjZuLvriC5/Vc+ZMn0PppJPirij3XHWVdwX99a/jrkR2hgJAYldUBE884b1o6tb1efMH\nDYKPPoq7snJfful1TZ8Oo0d7YMm2mjXzi/3jx3svMMltCgDJGf36wdtvw29/6+vmduvmF4vXr4+3\nrq+/9jf8KVN8pbXvfz/eenLdpZf6tNfDhuXnBf4kUQBITqlf3+eZnzfP29qvucZnnZw4MZ56vv0W\n/uu/fHTvX//q/d1lxxo29AvBU6fmbnOeOAWA5KR99oExY+C553zuoX79vPvosmXZq2H9ejj9dHjx\nRfjLX+Ccc7L33Pnuf/7Hu4ZefbVPGS25SQEgOe2kk3y++eHDfcxA165wxx3lE9LVlA0bPHCefRb+\n/GdNd7yr6tb1QWFz5viCRJKbFACS8xo08B5Cc+d699Ff/MLnnpk8uWaeb+NG+MlP/Azkrrvgpz+t\nmecpdKef7hMC/uY38V/HkcopACRv7Luv9y4ZM8b74x99tDfLrFiRuefYtAmGDPFeSbfd5v3apXpq\n1fKL+B9+CCNGxF2NVEYBIHnFDFIpn4H06qu9S+b++8N996Xf1rx5M5x/Pvz97z5txS9+kZmak6x/\nf58s76abfByF5BYFgOSlRo28u+js2T798qWX+rKV06ZV7/FCgJ/9zHv6DB/uvY8kfWZ+FrBypTen\nSW5RAEhe69rVB5CNHu1NQb17+6Rkq1bt/GOE4E09Dz7ofdc1l01m9erlZ2233bZr/y9S8xQAkvfM\n4Ic/9CklrrwSHnnEm4VGjPBmnR0JwZt67r0Xhg71s4okL2tZU26+2ZuAfv/7uCuRihQAUjAaN/bJ\n5d56Cw48EC680M8IZsyofP8QvKnnj3+ESy7x39Wbf83o3t0v2N93n6+fILlBASAF58ADfbHyRx/1\nHiiHHgoXXwxr1my53w03+CfSCy/0xWv05l+zrr/eQ/eGG+KuRMpUOwDMrL2ZvWpm88xsrpldHm1v\nbmYvm9mC6HuzaLuZ2T1mttDMZpvZIZn6R4hszcz78s+f75/uH3jAm4UeecSbhW65xd+IhgzxgV56\n8695++xTfqH93XfjrkYgvTOAjcCVIYRuQC/gYjM7ABgGTAwhdAEmRrcBBgBdoq8LgPvTeG6RndK0\nqX+6LynxcQRDhnhzxLXXwo9/7FM81NJ5cNZcc43PFXTttXFXIpBGAIQQPg4hzIx+/hyYBxQBA4FR\n0W6jgFT080Dg0eCmAU3NrG21KxfZBT17+uRkDz3kPVHOPts/idauHXdlydKqlV90f+opePPNuKuR\njHz2MbMOQE9gOtAmhPAxeEgAraPdioCKl3+WRttEsqJWLZ/TZ8UKX7WqTp24K0qmoUOhZUsfyCfx\nSjsAzGx34CngihDCZzvatZJt28wWbmYXmFmJmZWUlpamW57INtTkE6/Gjb0JaOJEmDAh7mqSLa0/\nBTOri7/5PxZCeDravKKsaSf6vjLavhRoX+HX2wHLt37MEMKIEEJxCKG4VatW6ZQnIjnqootg7721\naEzc0ukFZMBIYF4I4Y8V7hoHDI5+HgyMrbD9nKg3UC9gXVlTkYgkS/36cOONPkbjqafiria5LFQz\nfs3sKOB1YA5QNt7yGvw6wD+AvYHFwOkhhNVRYNwHnAh8BQwJIZTs6DmKi4tDSckOdxGRPLVpE/To\n4WsvzJ2razIVLVsGX30FXbpU7/fNbEYIobiq/ar9kocQplB5uz5A30r2D8DF1X0+ESkstWv71BsD\nB/r4jPPOi7ui+CxeDJMmlX8tXOjrKfzjHzX7vNU+A8gGnQGIFLYQ4Mgj/Q1wwQJf/KfQheAj1CdN\n8hHrkyb5bfBxK336+FoX/fr5etjVUeNnACIi6TLz6TiOPhr+9KfCXIMhBP9EX/ETftl8SC1a+Bv+\nFVf4a/Cd72R3bIrOAEQkdied5NN6d+3qU3aUfZXdbtIk7gp3Xgg+BUnFN/zlUX/H1q39jb7s64AD\naqZbss4ARCRvjBwJd97pcwTNng3PPLPlCm9t2mwZDGVfHTvGf/E4BF+hrqxJZ/Lk8mVK27bd8g2/\na9fcmndKASAisWvbFv7wh/Lb69fDokX+Sbri15gxWy4qU7cudO5ceTi0aFEztW7eDHPmlH+6nzy5\nvKZ27bzt/uijfSnMzp1z6w1/awoAEck59er5p+WuXbe979NPtw2G+fPhuee8S2mZFi0qb07q1Mkf\nf2dt2gRvv13+hv/667B6td/XoQOcfHL5J/yOHXP7DX9rugYgIgVh40bvTVNZOHzySfl+tWt7CFR2\n1tC6tb/hz5pV3qQzZQqsW+e/u+++Wzbp7LNPHP/SqukagIgkSp063uTSubN/Kq9o3brKg2HCBPjm\nm/L9mjb1APj8c7+9335wxhnenNOnjzfxFBIFgIgUvCZN4LDD/KuizZt9DELFUAB/s+/Tx69NFDIF\ngIgkVq1a3o7foQOccELc1WSfJsYVEUkoBYCISEIpAEREEkoBICKSUAoAEZGEUgCIiCSUAkBEJKEU\nACIiCZXTcwGZWSnwUdx1pKklsKrKvZJDr8eW9HqU02uxpXRej31CCK2q2imnA6AQmFnJzkzKlBR6\nPbak16OcXostZeP1UBOQiEhCKQBERBJKAVDzRsRdQI7R67ElvR7l9FpsqcZfD10DEBFJKJ0BiIgk\nlAIgg8ysvZm9ambzzGyumV0ebW9uZi+b2YLoe7O4a80WM6ttZrPMbHx0u6OZTY9eiyfMbBdWZ81v\nZtbUzJ40s3ejY6R3wo+Nn0d/J++Y2eNmtluSjg8ze9jMVprZOxW2VXo8mLvHzBaa2WwzOyQTNSgA\nMmsjcGUIoRvQC7jYzA4AhgETQwhdgInR7aS4HJhX4fatwJ3Ra7EGODeWquJxN/BiCKEr0AN/XRJ5\nbJhZEXAZUBxCOBCoDZxJso6PR4ATt9q2veNhANAl+roAuD8TBSgAMiiE8HEIYWb08+f4H3gRMBAY\nFe02CkjFU2F2mVk74GTgoei2AccBT0a7JOm12APoA4wECCGsDyGsJaHHRqQO0MDM6gANgY9J0PER\nQpgMrN5q8/aOh4HAo8FNA5qaWdoLVioAaoiZdQB6AtOBNiGEj8FDAmgdX2VZdRdwFbA5ut0CWBtC\n2BjdXooHZBJ0AkqBv0ZNYg+ZWSMSemyEEJYBtwOL8Tf+dcAMknt8lNne8VAELKmwX0ZeGwVADTCz\n3YGngCtCCJ/FXU8czOwUYGUIYUbFzZXsmpRuaHWAQ4D7Qwg9gS9JSHNPZaK27YFAR2AvoBHezLG1\npBwfVamRvx0FQIaZWV38zf+xEMLT0eYVZadr0feVcdWXRUcCp5nZh8Bo/NT+LvzUtU60TztgeTzl\nZd1SYGkIYXp0+0k8EJJ4bAD0Az4IIZSGEDYATwNHkNzjo8z2joelQPsK+2XktVEAZFDUxj0SmBdC\n+GOFu8YBg6OfBwNjs11btoW/To2VAAAA/ElEQVQQrg4htAshdMAv7r0SQvgR8Crwg2i3RLwWACGE\nT4AlZrZ/tKkv8B8SeGxEFgO9zKxh9HdT9nok8vioYHvHwzjgnKg3UC9gXVlTUTo0ECyDzOwo4HVg\nDuXt3tfg1wH+AeyNH/inhxC2vvhTsMzsGOAXIYRTzKwTfkbQHJgF/DiE8G2c9WWLmR2MXxCvBywC\nhuAfwhJ5bJjZDcAP8d5zs4Dz8HbtRBwfZvY4cAw+6+cKYDjwDJUcD1FI3of3GvoKGBJCKEm7BgWA\niEgyqQlIRCShFAAiIgmlABARSSgFgIhIQikAREQSSgEgIpJQCgARkYRSAIiIJNT/A42nYqWZMF0e\nAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x24c1072ff60>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt \n",
    "plt.plot(Ks, np.array(CH_scores), 'b-')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
